#!/path/to/Rscript
setwd("/mnt/data/wr/2020work/2_cooperation/001_new.revise/2_disease")
pb <- read.table("patho.benign.signal.txt",header=T,sep="\t")
data.p <- cbind(as.data.frame(pb[,1]),pb[,2]/sum(pb[,2]))
colnames(data.p) <- c("signal","fre")
#data.b <- data[,c(1,3)]
data.b <- cbind(as.data.frame(pb[,1]),pb[,3]/sum(pb[,3]))
colnames(data.b) <- c("signal","fre")
data <- rbind(data.p,data.b)
data <- data[order(data[,1]),]
c <- rep(c("patho","benign"),times=length(rownames(data))/2)
data <- cbind(c,data)
colnames(data) <- c("Label","signal","fre")
data <- as.data.frame(data)
library(ggplot2)
pdf("pathogenic.benign.signal.percentage.pdf",width=8,height=5)
ggplot(data = data,aes(x = signal, y = fre,fill = Label))+
  labs(x="Signal",y="The frequency of mutations",title="The difference between pathogenic and benign mutations")+
  theme_bw()+
  theme( axis.text.x = element_text(size = 12,angle = 45,hjust = 1),panel.grid.major = element_blank(),panel.grid.minor=element_blank(),plot.title = element_text(hjust=0.5,size=12,vjust=0.5,face = "bold"))+
  geom_bar(stat = "identity",position = "dodge")+
  scale_fill_manual(values = c("#FF9933","#0066CC"))+
  expand_limits(y=c(0,0.6))
dev.off()

rownames(pb) <- pb[,1]
pb <- pb[,-1]
patho.N <- sum(pb[,1])-pb[,1]
benign.N <- sum(pb[,2])-pb[,2]
pb <- cbind(pb,patho.N,benign.N)
p <- matrix(nrow=length(rownames(pb)),ncol=3)
for (i in 1:length(rownames(pb))){
  p[i,1] <- fisher.test(t(matrix(as.numeric(pb[i,]),nrow = 2,dimnames = list(c('patho','benign'),c('Y','N')))), alternative = "greater")$p.value
  p[i,2] <- fisher.test(t(matrix(as.numeric(pb[i,]),nrow = 2,dimnames = list(c('patho','benign'),c('Y','N')))), alternative = "greater")$estimate
   p[i,3] <- phyper(pb[i,1]-1,pb[i,1]+pb[i,2],pb[i,3]+pb[i,4],pb[i,1]+pb[i,3],lower.tail = F)
}
rownames(p) <- rownames(pb)
colnames(p) <- c("fisher.p.value","OR","hyper.p.value")
write.table(p,file="patho.benign.signal.pvalue.txt",quote=FALSE,append=FALSE,sep="\t")

##################
p.6m <- read.table("path.6mer.txt",header=T,sep="\t")
labels <-p.6m[,1]
labels <- paste(labels,round(p.6m[,2]/sum(p.6m[,2])*100,1),sep="-")
labels <- paste(labels, "%", sep="")
rownames(p.6m) <- p.6m[,1]
p.6m <- p.6m[,-1]
f.p <- as.matrix(p.6m/sum(p.6m))
b.6m <- read.table("benign.6mer.txt",header=T,sep="\t")
plabels <-b.6m[,1]
plabels <- paste(plabels,round(b.6m[,2]/sum(b.6m[,2])*100,1),sep="-")
plabels <- paste(plabels, "%", sep="")
rownames(b.6m) <- b.6m[,1]
b.6m <- b.6m[,-1]
f.b <- as.matrix(b.6m/sum(b.6m))
data <- rbind(f.p,f.b)
c <- rep(1:6,times=2)
x <- rep(c("patho","benign"),each=6)
data <- data.frame(x=c,y=x,z=data)
colnames(data) <- c("Position","Label","fre")
library(ggplot2)
library(plotrix)
pdf("plot.path.benign.6mer.pdf",width=5,height=5)
ggplot(data=data,aes(x=factor(Position),y=fre,fill=Label))+
  labs(x="Position in 6-mer",y="The frequency of mutations",title="The difference between pathogenic and benign mutations")+
  theme_bw()+
  theme(axis.text.x = element_text(size = 12),,panel.grid.major = element_blank(),panel.grid.minor=element_blank(),plot.title = element_text(hjust=0.5,size=8,vjust=0.5,face = "bold"))+
  geom_bar(stat = "identity",position = "dodge",width = 0.8)+
  coord_cartesian(ylim = c(0,0.5))+
  scale_fill_manual(values = c("#FF9933","#0066CC"))+
  expand_limits(y=c(0,0.5))
dev.off()
nf <- cbind(p.6m,b.6m)
nf.p <- sum(nf[,1])-nf[,1]
nf.b <- sum(nf[,2])-nf[,2]
nf <- cbind(nf,nf.p,nf.b)
rownames(nf) <- c("1","2","3","4","5","6")
colnames(nf) <- c('patho','benign','Y','N')
p <- matrix(nrow=length(rownames(nf)),ncol=3)
for (i in 1:length(rownames(nf))){
  p[i,1] <- fisher.test(t(matrix(as.numeric(nf[i,]),nrow = 2,dimnames = list(c('patho','benign'),c('Y','N')))))$p.value
  p[i,2] <- fisher.test(t(matrix(as.numeric(nf[i,]),nrow = 2,dimnames = list(c('patho','benign'),c('Y','N')))))$estimate
   p[i,3] <- phyper(nf[i,1]-1,nf[i,1]+nf[i,2],nf[i,3]+nf[i,4],nf[i,1]+nf[i,3],lower.tail = F)
}
rownames(p) <- rownames(nf)
colnames(p) <- c("fisher.p.value","OR","phyper.p.value")
write.table(p,file="plot.path.benign.6mer.pvalue.txt",quote = FALSE,append = FALSE,sep="\t")
